home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Personal Computer World 2009 February
/
PCWFEB09.iso
/
Software
/
Resources
/
Chat & Communication
/
Digsby build 37
/
digsby_setup.exe
/
lib
/
util
/
httptools.pyo
(
.txt
)
< prev
next >
Wrap
Python Compiled Bytecode
|
2008-10-13
|
11KB
|
349 lines
# Source Generated with Decompyle++
# File: in.pyo (Python 2.5)
from __future__ import with_statement
from callbacks import callsback
from threads import threaded
from threads.timeout_thread import Timer
from net import build_opener, build_cookie
import re
import StringIO
import cookielib
import urllib2
import logging
import lxml.etree as ET
import lxml.html as HTML
import operator
from contextlib import closing
itemgetter0 = operator.itemgetter(0)
log = logging.getLogger('httptools')
class RequestOpener(object):
retries = 3
pause_for_attempts = 1
js_redirect_res = ((re.compile('window.location.replace\\("(.*?)"\\);'), 1),)
def __init__(self, opener, request, data = None, **kwds):
self.openfunc = getattr(opener, 'open', opener)
retries = kwds.pop('retries', None)
if retries is not None:
self.retries = retries
if isinstance(request, basestring):
request = urllib2.Request.make_request(request, data, **kwds)
self.request = request
self._sub_requester = None
self.callback = None
def open(self, callback = None):
if self.callback is not None:
raise Exception('Request already in progress')
self.callback = callback
self._attempt_open()
open = callsback(open)
def _attempt_open(self):
self.openfunc(self.request, success = self._check_success, error = self._check_error)
def preprocess_response(self, resp):
closing(resp).__enter__()
try:
data = resp.read()
finally:
pass
sio = StringIO.StringIO(data)
for attr in ('read', 'seek', 'close', 'tell'):
setattr(resp, attr, getattr(sio, attr))
resp._stringio = sio
resp.content = data
return resp
def _check_success(self, resp):
resp = self.preprocess_response(resp)
try:
self.redirect(resp)
except Exception:
e = None
error = self.check_resp_for_errors(resp)
if error is None:
self.finish('success', resp)
else:
self._on_error(error)
except:
error is None
def _redirect_success(self, resp):
self._sub_requester = None
self.finish('success', resp)
def _redirect_error(self, err = None):
self._sub_requester = None
self._on_error(err)
def redirect(self, resp):
if self._sub_requester is not None:
raise Exception('Redirect already in progress')
redirect = self.make_redirect_request(resp)
new = self._sub_requester = type(self)(self.openfunc, redirect)
new.open(success = self._redirect_success, error = self._redirect_error)
def make_redirect_request(self, resp):
for redirecter in (self._find_http_redirect, self._find_js_redirect):
redirect = redirecter(resp)
if redirect is not None:
if not redirect.startswith('http'):
if not redirect.startswith('/'):
redirect = '/' + redirect
redirect = self.request.get_type() + '://' + self.request.get_host() + redirect
log.debug('got redirect: %r', redirect)
return redirect
continue
raise Exception("Couldn't find URL for redirect in %r" % resp.content)
def _find_http_redirect(self, resp):
if resp.code in (301, 302):
return resp.headers.get('Location', None)
def _find_js_redirect(self, resp):
for redirect_re, url_group_id in self.js_redirect_res:
match = redirect_re.search(resp.content)
if match:
new_url = match.group(url_group_id)
if new_url:
return new_url
new_url
def check_resp_for_errors(self, resp):
pass
def _check_error(self, err = None):
self._on_error(err)
def _on_error(self, e = None):
self.retries -= 1
if self.retries:
if self.pause_for_attempts > 0:
Timer(self.pause_for_attempts, self._attempt_open).start()
else:
self._attempt_open()
else:
self.finish('error', e)
def finish(self, result, *args):
cb = self.callback
self.callback = None
self._sub_request = None
getattr(cb, result, (lambda : pass))(*args)
def dispatcher(what, arg_getter):
def dispatch(self, *args):
name = arg_getter(args)
handler = getattr(self, '%s_%s' % (what, name), getattr(self, '%s_default' % what, None))
if handler is not None:
return handler(*args)
else:
log.error('No default handler for %r', what)
return dispatch
class WebScraper(object):
CookieJarFactory = cookielib.CookieJar
HttpOpenerFactory = staticmethod(build_opener)
RequestFactory = staticmethod(urllib2.Request.make_request)
domain = None
urls = { }
def __init__(self):
self._waiting = set()
self._callbacks = { }
self.init_http()
def init_http(self):
self._jar = self.CookieJarFactory()
self.http = self.HttpOpenerFactory(urllib2.HTTPCookieProcessor(self._jar))
def get_cookie(self, key, default = sentinel, domain = None, path = '/'):
if domain is None:
domain = self.domain
val = default
try:
self._jar._cookies_lock.__enter__()
try:
val = self._jar._cookies[domain][path][key].value
finally:
pass
except (AttributeError, KeyError):
e = None
if val is sentinel:
raise e
else:
return val
except:
val is sentinel
return val
def set_cookie(self, key, value, domain = None, path = '/'):
if domain is None:
domain = self.domain
self._jar._cookies_lock.__enter__()
try:
domain_dict = self._jar._cookies.setdefault(domain, { })
path_dict = domain_dict.setdefault(path, { })
cookie = path_dict.get(key, None)
if cookie is None:
cookie = build_cookie(key, value, domain = domain, path = path)
path_dict[key] = cookie
else:
cookie.value = value
finally:
pass
def set_waiting(self, *things):
self._waiting.update(things)
def clear_waiting(self, *things):
self._waiting -= set(things)
if not self._waiting:
self.done_waiting()
def done_waiting(self):
pass
def request(self, name, callback = None):
if name in self._waiting:
log.warning('already waiting for %r', name)
return None
self._callbacks[name] = callback
req = self.build_request(name)
self.perform_request(name, req)
request = callsback(request)
def perform_request(self, name, req):
self.set_waiting(name)
if req is None:
return self.error_handler(name)(Exception('No request created for %r' % name))
reqopen = RequestOpener(threaded(self.http.open), req)
reqopen.open(success = self.success_handler(name), error = self.error_handler(name))
def error_handler(self, name):
def handler(e = (None, None)):
self.clear_waiting(name)
cb = self._callbacks.pop(name, None)
retval = self.handle_error(name, e)
if cb is not None:
cb.error(e)
return retval
return handler
def success_handler(self, name):
def handler(resp):
self.clear_waiting(name)
resp = self.preprocess_resp(name, resp)
newresp = self.handle_success(name, resp)
if newresp is not None:
resp = newresp
cb = self._callbacks.pop(name, None)
if cb is not None:
cb.success(resp)
return newresp
return handler
build_request = dispatcher('build_request', itemgetter0)
handle_error = dispatcher('handle_error', itemgetter0)
preprocess_resp = dispatcher('preprocess_resp', itemgetter0)
handle_success = dispatcher('handle_success', itemgetter0)
def build_request_default(self, name):
link = self.urls[name]
if callable(link):
link = link()
return self.RequestFactory(link)
def handle_error_default(self, name, e):
log.error('Error requesting %r: %r', name, e)
def handle_success_default(self, name, resp):
if resp.document is not None:
print HTML.tostring(resp.document, pretty_print = True)
else:
print 'Got None for lxml doc. code/status= %r' % ((resp.code, resp.msg, str(resp.headers)),)
def preprocess_resp_default(self, name, resp):
data = resp.content
if data:
document = HTML.fromstring(data, base_url = resp.geturl())
document.make_links_absolute()
resp.document = document
else:
resp.document = None
return resp
if __name__ == '__main__':
pass